Skip to contents

Introduction

  • Once you’ve downloaded all the requisite source information her the package’s instructions, you can build the package and re-create Figures and Tables published in Roy2025
  • This vignette is provided to enable anyone to see the source data and methodology behind our publication.

Figure 1

1A - Continental US A11

gg_state

out_data |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `State` = region,
    `Census Region` = census_region,
    `FIPS` = fips,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1B - A11:01 by County

out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1C - NDMP Correlations

NMDP Values are not publicly available for privacy reasons. Please contact co-Author Martin Maiers for inquiries.

1D - A11:01 by CA County

gg_a11_in_ca

out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1E - A11:01 by CA by H4 Hexagon

ca_4$p1

ca_4$genotypic_frequencies_by_hexon |> 
  dplyr::ungroup() |> 
  dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |> 
  dplyr::rename(
    `Hexagon ID` = hex,
    `Allele` = allele,
    `Hexagon Population` = total_2020_pop,
    `Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |> 
  dplyr::select(
    `Hexagon ID`,
    `Allele`,
    `Hexagon Population`,
    `Population-Adjusted Genotypic Frequency`
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1F - A11:01 Catchment

gg_catchment

  • For a table of A11:01 catchment, refer to Table 2 below.

Figure 2

2A - B58:01 by County

gg_b58_by_county

2B - B58:01 in MS by County

gg_b58_in_ms

out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

2C - B58:01 in MS by Hexagon

ms_4$p1

ms_4$genotypic_frequencies_by_hexon |> 
  dplyr::ungroup() |> 
  dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |> 
  dplyr::rename(
    `Hexagon ID` = hex,
    `Allele` = allele,
    `Hexagon Population` = total_2020_pop,
    `Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |> 
  dplyr::select(
    `Hexagon ID`,
    `Allele`,
    `Hexagon Population`,
    `Population-Adjusted Genotypic Frequency`
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

2D - B58:01 Catchment

gg_catchment

Tables

Table 1: United States 2020 Census Adjusted HLA-A*11:01 Genotypic Frequencies

CensusHLA::us_pop_multirace_in_nmdp_codes |> 
  dplyr::left_join(
  CensusHLA::nmdp_hla_frequencies_by_race_us_2020_census_adjusted |> 
    dplyr::filter(allele == 'A*11:01') |> 
    dplyr::select(allele, allele, nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |> 
    dplyr::arrange(desc(us_2020_percent_pop))
  ) |> 
  # Convert percentages and gfs to percentages
  dplyr::mutate(
    us_2020_percent_pop = us_2020_percent_pop * 100,
    nmdp_calc_gf = nmdp_calc_gf * 100,
    us_2020_nmdp_gf = us_2020_nmdp_gf * 100
  ) |>
  # Round percentages and gf to 1 digit after decimal
  dplyr::mutate(
    us_2020_percent_pop = round(us_2020_percent_pop, 1),
    nmdp_calc_gf = round(nmdp_calc_gf, 1),
    us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
  ) |>
  dplyr::select(
    `Ethnic Code` = nmdp_race_code,
    `Allele` = allele,
    `Single Race Population` =  total_single_race_pop,
    `Multi-Race Population ` = total_multiple_race_pop,
    `Total Population` = total_2020_pop,
    `Percentage of Total Pop` = us_2020_percent_pop,
    `NMDP Calcualted GF` = nmdp_calc_gf,
    `Population-Adjusted GF` = us_2020_nmdp_gf
  ) |> 
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )
## Joining with `by = join_by(nmdp_race_code)`

Table 2: HLA-A*11:01 Population-adjusted genotypic frequencies for top 11 NCI Catchment areas.

CensusHLA::a11_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |>  dplyr::arrange(desc(patient_pop)) |> DT::datatable(
  ,filter = 'top'
  ,rownames = FALSE
  ,extensions = 'Buttons', options = list(
    scrollX=TRUE,
    #pageLength = 11,
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
    )
  )
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Supplemental Tables

Supplemental Table 1 - California County population-adjusted HLA-A*11:01 Genotypic frequencies

out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Supplemental Table 2 - United States 2020 Census Adjusted HLA-B*58:01 Genotypic Frequencies for Mississippi

CensusHLA::us_pop_multirace_in_nmdp_codes |> 
  dplyr::left_join(
  CensusHLA::census_adjusted_nmdp_hla_frequencies_by_state |> dplyr::filter(allele == 'B*58:01') |> 
  dplyr::filter(census_region == 'Mississippi') |> 
    dplyr::select(allele,census_region,nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |> 
    dplyr::arrange(desc(us_2020_percent_pop))
  ) |> 
  # Convert percentages and gfs to percentages
  dplyr::mutate(
    us_2020_percent_pop = us_2020_percent_pop * 100,
    nmdp_calc_gf = nmdp_calc_gf * 100,
    us_2020_nmdp_gf = us_2020_nmdp_gf * 100
  ) |>
  # Round percentages and gf to 1 digit after decimal
  dplyr::mutate(
    us_2020_percent_pop = round(us_2020_percent_pop, 1),
    nmdp_calc_gf = round(nmdp_calc_gf, 1),
    us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
  ) |>
  dplyr::select(
    `Region` = census_region,
    `Ethnic Code` = nmdp_race_code,
    `Allele` = allele,
    `Single Race Population` =  total_single_race_pop,
    `Multi-Race Population ` = total_multiple_race_pop,
    `Total Population` = total_2020_pop,
    `Percentage of Total Pop` = us_2020_percent_pop,
    `NMDP Calcualted GF` = nmdp_calc_gf,
    `Population-Adjusted GF` = us_2020_nmdp_gf
  ) |> 
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )
## Joining with `by = join_by(nmdp_race_code)`

Supplemental Table 3 - Mississippi County population-adjusted HLA-B*58:01 Genotypic frequencies

out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Supplemental Table 4 - HLA-B*58:01 Population-adjusted genotypic frequencies by NCI Catchment areas. {#st4}

CensusHLA::b58_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |>  dplyr::arrange(desc(patient_pop)) |> DT::datatable(
  ,filter = 'top'
  ,rownames = FALSE
  ,extensions = 'Buttons', options = list(
    scrollX=TRUE,
    pageLength = 11,
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
    )
  )
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

References